In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
Loading data¶
In [3]:
df = pd.read_csv('/Users/naina/desktop/first_project/WA_Fn-UseC_-Telco-Customer-Churn.csv')
df.head()
Out[3]:
| customerID | gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | ... | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 7590-VHVEG | Female | 0 | Yes | No | 1 | No | No phone service | DSL | No | ... | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No |
| 1 | 5575-GNVDE | Male | 0 | No | No | 34 | Yes | No | DSL | Yes | ... | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.5 | No |
| 2 | 3668-QPYBK | Male | 0 | No | No | 2 | Yes | No | DSL | Yes | ... | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes |
| 3 | 7795-CFOCW | Male | 0 | No | No | 45 | No | No phone service | DSL | Yes | ... | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No |
| 4 | 9237-HQITU | Female | 0 | No | No | 2 | Yes | No | Fiber optic | No | ... | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes |
5 rows × 21 columns
Data basic info¶
Data shape¶
In [4]:
df.shape
Out[4]:
(7043, 21)
Data types¶
In [5]:
df.dtypes
Out[5]:
customerID object gender object SeniorCitizen int64 Partner object Dependents object tenure int64 PhoneService object MultipleLines object InternetService object OnlineSecurity object OnlineBackup object DeviceProtection object TechSupport object StreamingTV object StreamingMovies object Contract object PaperlessBilling object PaymentMethod object MonthlyCharges float64 TotalCharges object Churn object dtype: object
Missing values¶
In [6]:
df.isna().sum()
Out[6]:
customerID 0 gender 0 SeniorCitizen 0 Partner 0 Dependents 0 tenure 0 PhoneService 0 MultipleLines 0 InternetService 0 OnlineSecurity 0 OnlineBackup 0 DeviceProtection 0 TechSupport 0 StreamingTV 0 StreamingMovies 0 Contract 0 PaperlessBilling 0 PaymentMethod 0 MonthlyCharges 0 TotalCharges 0 Churn 0 dtype: int64
Data basic stats¶
In [7]:
df.describe()
Out[7]:
| SeniorCitizen | tenure | MonthlyCharges | |
|---|---|---|---|
| count | 7043.000000 | 7043.000000 | 7043.000000 |
| mean | 0.162147 | 32.371149 | 64.761692 |
| std | 0.368612 | 24.559481 | 30.090047 |
| min | 0.000000 | 0.000000 | 18.250000 |
| 25% | 0.000000 | 9.000000 | 35.500000 |
| 50% | 0.000000 | 29.000000 | 70.350000 |
| 75% | 0.000000 | 55.000000 | 89.850000 |
| max | 1.000000 | 72.000000 | 118.750000 |
Data basic Stats and analysis¶
Drop unwanted columns¶
In [8]:
df.drop('customerID',axis='columns',inplace=True)
Data analysis on each columns¶
Class distribution¶
In [9]:
print(df.Churn.value_counts())
fig = px.pie(df, names=df["Churn"].map({"No":"Non-churn","Yes":"Churn"}), title='Population of Churn and Non-churn group')
fig.update_traces(textinfo='value+percent', textfont_size=18)
fig.update_layout(width=700, height=500)
fig.show()
Churn No 5174 Yes 1869 Name: count, dtype: int64
gender distribution¶
In [10]:
print(df.gender.value_counts())
fig = px.pie(df, names=df["gender"])
fig.update_traces(textinfo='value+percent', textfont_size=18)
fig.update_layout(width=700, height=500)
fig.show()
gender Male 3555 Female 3488 Name: count, dtype: int64
gender distribution by class¶
In [11]:
sns.set(style="whitegrid")
plt.figure(figsize=(8,4))
sns.countplot(x='gender', hue='Churn', data=df)
plt.title('gender by Churn')
plt.xlabel('gender')
plt.ylabel('Churn')
plt.show()
/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
SeniorCitizen distribution¶
In [12]:
print(df.SeniorCitizen.value_counts())
fig = px.pie(df, names=df["gender"])
fig.update_traces(textinfo='value+percent', textfont_size=18)
fig.update_layout(width=700, height=500)
fig.show()
SeniorCitizen 0 5901 1 1142 Name: count, dtype: int64
SeniorCitizen distribution by class¶
In [13]:
sns.set(style="whitegrid")
plt.figure(figsize=(8,4))
sns.countplot(x='SeniorCitizen', hue='Churn', data=df)
plt.title('SeniorCitizen by Churn')
plt.xlabel('SeniorCitizen')
plt.ylabel('Churn')
plt.show()
/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
Partner distribution¶
In [14]:
print(df.SeniorCitizen.value_counts())
fig = px.pie(df, names=df["Partner"])
fig.update_traces(textinfo='value+percent', textfont_size=18)
fig.update_layout(width=700, height=500)
fig.show()
SeniorCitizen 0 5901 1 1142 Name: count, dtype: int64
Partner distribution by class¶
In [15]:
sns.set(style="whitegrid")
plt.figure(figsize=(8,4))
sns.countplot(x='Partner', hue='Churn', data=df)
plt.title('Partner by Churn')
plt.xlabel('Partner')
plt.ylabel('Churn')
plt.show()
/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
Dependents distribution¶
In [16]:
print(df.Dependents.value_counts())
fig = px.pie(df, names=df["Dependents"])
fig.update_traces(textinfo='value+percent', textfont_size=18)
fig.update_layout(width=700, height=500)
fig.show()
Dependents No 4933 Yes 2110 Name: count, dtype: int64
Dependents distribution by class¶
In [17]:
sns.set(style="whitegrid")
plt.figure(figsize=(8,4))
sns.countplot(x='Dependents', hue='Churn', data=df)
plt.title('Dependents by Churn')
plt.xlabel('Dependents')
plt.ylabel('Churn')
plt.show()
/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
Contract distribution¶
In [18]:
print(df.Dependents.value_counts())
fig = px.pie(df, names=df["Contract"])
fig.update_traces(textinfo='value+percent', textfont_size=18)
fig.update_layout(width=700, height=500)
fig.show()
Dependents No 4933 Yes 2110 Name: count, dtype: int64
Contract distribution by class¶
In [19]:
sns.set(style="whitegrid")
plt.figure(figsize=(8,4))
sns.countplot(x='Contract', hue='Churn', data=df)
plt.title('Contract by Churn')
plt.xlabel('Contract')
plt.ylabel('Churn')
plt.show()
/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
Converting TotalCharges to numerical¶
In [20]:
df = df[df["TotalCharges"] != ' ']
df["TotalCharges"] = df["TotalCharges"].apply(float)
tenure analysis for churn class¶
In [21]:
tenure_churn_no = df[df.Churn=='No'].tenure
tenure_churn_yes = df[df.Churn=='Yes'].tenure
plt.xlabel("tenure")
plt.ylabel("Number Of Customers")
plt.title("Customer churn prediction ")
plt.hist([tenure_churn_yes, tenure_churn_no], rwidth=0.9, color=['blue','red'],label=['Churn=Yes','Churn=No'])
plt.legend()
Out[21]:
<matplotlib.legend.Legend at 0x175f9ce90>
monthly charges analysis¶
In [22]:
monthly_charges_no = df[df.Churn=='No'].MonthlyCharges
monthly_charges_yes = df[df.Churn=='Yes'].MonthlyCharges
plt.xlabel("Monthly Charges")
plt.ylabel("Number Of Customers")
plt.title("Customer Churn Prediction Visualiztion")
plt.hist([monthly_charges_yes, monthly_charges_no], rwidth=0.8, color=['blue','red'],label=['Churn=Yes','Churn=No'])
plt.legend()
Out[22]:
<matplotlib.legend.Legend at 0x176550290>
Contract vs PaymentMethod analysis¶
In [23]:
cross_tab = pd.crosstab(df['Contract'], df['PaymentMethod'])
sns.heatmap(cross_tab, annot=True, fmt='d', cmap='coolwarm')
plt.title('Heatmap of Contract vs. PaymentMethod')
plt.show()
contact vs InternetService¶
In [24]:
sns.set(style="whitegrid")
plt.figure(figsize=(8,4))
sns.countplot(x='InternetService', hue='Contract', data=df)
plt.title('Contract by internet service Type')
plt.xlabel('Internet service Type')
plt.ylabel('Contract Tpe')
plt.show()
/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
PaymentMethod vs InternetService¶
In [78]:
sns.set(style="whitegrid")
plt.figure(figsize=(10,6))
sns.countplot(x='PaymentMethod', hue='InternetService', data=df)
plt.title('Internet service by payment method')
plt.xlabel('Payment Method')
plt.ylabel('Internet Service')
plt.show()
/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
function for printing unique values in each columns¶
In [26]:
def print_unique_col_values(df):
for column in df:
if df[column].dtypes=='object':
print(f'{column}: {df[column].unique()}')
unique values in each columns¶
In [27]:
print_unique_col_values(df)
gender: ['Female' 'Male'] Partner: ['Yes' 'No'] Dependents: ['No' 'Yes'] PhoneService: ['No' 'Yes'] MultipleLines: ['No phone service' 'No' 'Yes'] InternetService: ['DSL' 'Fiber optic' 'No'] OnlineSecurity: ['No' 'Yes' 'No internet service'] OnlineBackup: ['Yes' 'No' 'No internet service'] DeviceProtection: ['No' 'Yes' 'No internet service'] TechSupport: ['No' 'Yes' 'No internet service'] StreamingTV: ['No' 'Yes' 'No internet service'] StreamingMovies: ['No' 'Yes' 'No internet service'] Contract: ['Month-to-month' 'One year' 'Two year'] PaperlessBilling: ['Yes' 'No'] PaymentMethod: ['Electronic check' 'Mailed check' 'Bank transfer (automatic)' 'Credit card (automatic)'] Churn: ['No' 'Yes']
Replace No internet service and No phone service with no¶
In [28]:
df.replace('No internet service','No',inplace=True)
df.replace('No phone service','No',inplace=True)
In [29]:
print_unique_col_values(df)
gender: ['Female' 'Male'] Partner: ['Yes' 'No'] Dependents: ['No' 'Yes'] PhoneService: ['No' 'Yes'] MultipleLines: ['No' 'Yes'] InternetService: ['DSL' 'Fiber optic' 'No'] OnlineSecurity: ['No' 'Yes'] OnlineBackup: ['Yes' 'No'] DeviceProtection: ['No' 'Yes'] TechSupport: ['No' 'Yes'] StreamingTV: ['No' 'Yes'] StreamingMovies: ['No' 'Yes'] Contract: ['Month-to-month' 'One year' 'Two year'] PaperlessBilling: ['Yes' 'No'] PaymentMethod: ['Electronic check' 'Mailed check' 'Bank transfer (automatic)' 'Credit card (automatic)'] Churn: ['No' 'Yes']
Converting yes no columns with 0 and 1¶
In [30]:
yes_no_columns = ['Partner','Dependents','PhoneService','MultipleLines','OnlineSecurity','OnlineBackup',
'DeviceProtection','TechSupport','StreamingTV','StreamingMovies','PaperlessBilling','Churn']
for col in yes_no_columns:
df[col].replace({'Yes': 1,'No': 0},inplace=True)
In [31]:
df.head()
Out[31]:
| gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Female | 0 | 1 | 0 | 1 | 0 | 0 | DSL | 0 | 1 | 0 | 0 | 0 | 0 | Month-to-month | 1 | Electronic check | 29.85 | 29.85 | 0 |
| 1 | Male | 0 | 0 | 0 | 34 | 1 | 0 | DSL | 1 | 0 | 1 | 0 | 0 | 0 | One year | 0 | Mailed check | 56.95 | 1889.50 | 0 |
| 2 | Male | 0 | 0 | 0 | 2 | 1 | 0 | DSL | 1 | 1 | 0 | 0 | 0 | 0 | Month-to-month | 1 | Mailed check | 53.85 | 108.15 | 1 |
| 3 | Male | 0 | 0 | 0 | 45 | 0 | 0 | DSL | 1 | 0 | 1 | 1 | 0 | 0 | One year | 0 | Bank transfer (automatic) | 42.30 | 1840.75 | 0 |
| 4 | Female | 0 | 0 | 0 | 2 | 1 | 0 | Fiber optic | 0 | 0 | 0 | 0 | 0 | 0 | Month-to-month | 1 | Electronic check | 70.70 | 151.65 | 1 |
converting male female in 0 and 1¶
In [32]:
df['gender'].replace({'Female':1,'Male':0},inplace=True)
One hot encoding for categorica columns¶
In [33]:
df_dummies = pd.get_dummies(data=df, columns=['InternetService','Contract','PaymentMethod'])
df_dummies.columns
Out[33]:
Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
'PhoneService', 'MultipleLines', 'OnlineSecurity', 'OnlineBackup',
'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies',
'PaperlessBilling', 'MonthlyCharges', 'TotalCharges', 'Churn',
'InternetService_DSL', 'InternetService_Fiber optic',
'InternetService_No', 'Contract_Month-to-month', 'Contract_One year',
'Contract_Two year', 'PaymentMethod_Bank transfer (automatic)',
'PaymentMethod_Credit card (automatic)',
'PaymentMethod_Electronic check', 'PaymentMethod_Mailed check'],
dtype='object')
Data normalization for numerical columns¶
In [35]:
cols_to_scale = ['tenure','MonthlyCharges','TotalCharges']
scaler = MinMaxScaler()
df_dummies[cols_to_scale] = scaler.fit_transform(df_dummies[cols_to_scale])
Train Test split¶
In [36]:
X = df_dummies.drop('Churn',axis='columns')
y = df_dummies["Churn"]
In [37]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=43, stratify=y)
class distribution in each class¶
In [38]:
y_train.value_counts()
Out[38]:
Churn 0 4130 1 1495 Name: count, dtype: int64
Logistic regression model Training¶
In [39]:
lr = LogisticRegression()
In [40]:
lr.fit(X_train,y_train)
Out[40]:
LogisticRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression()
prediction on test data¶
In [41]:
y_pred_lr = lr.predict(X_test)
Calculate score¶
In [42]:
acc_lr = lr.score(X_test, y_pred_lr)
print("Accuracy_lr", acc_lr)
Accuracy_lr 1.0
Calculate precision and recall¶
In [43]:
precision_lr = precision_score(y_test, y_pred_lr)
recall_lr = recall_score(y_test, y_pred_lr)
print("Precision_lr:", precision_lr)
print("Recall_lr:", recall_lr)
Precision_lr: 0.6574074074074074 Recall_lr: 0.56951871657754
Fine Tune logistic regression model¶
In [44]:
log_reg = LogisticRegression()
In [45]:
param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100],
'penalty': ['l1', 'l2'],
'solver': ['liblinear', 'saga'],
'class_weight': ['balanced', None]}
grid_search_lr = GridSearchCV(log_reg, param_grid, cv=5, scoring='accuracy')
grid_search_lr.fit(X_train, y_train)
best_params_lr = grid_search_lr.best_params_
/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
In [46]:
best_log_reg = LogisticRegression(**best_params_lr)
best_log_reg.fit(X_train, y_train)
/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
Out[46]:
LogisticRegression(C=10, penalty='l1', solver='saga')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression(C=10, penalty='l1', solver='saga')
Fine tuned logistic regression model evaluation¶
In [47]:
y_pred_lr_best = best_log_reg.predict(X_test)
Calculate score¶
In [48]:
acc_lr_best = best_log_reg.score(X_test, y_test)
print("Accuracy_lr_best", acc_lr_best)
Accuracy_lr_best 0.8052594171997157
Calculate precision and recall¶
In [49]:
precision_lr_best = precision_score(y_test, y_pred_lr_best)
recall_lr_best = recall_score(y_test, y_pred_lr_best)
print("Precision_lr_best:", precision_lr_best)
print("Recall_lr_best:", recall_lr_best)
Precision_lr_best: 0.6524390243902439 Recall_lr_best: 0.5721925133689839
Train Random forest model¶
In [50]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train, y_train)
Out[50]:
RandomForestClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier()
prediction on test data¶
In [51]:
y_pred_rf = rf_classifier.predict(X_test)
Calculate score¶
In [52]:
acc_rf = rf_classifier.score(X_test, y_test)
print("Accuracy_fr", acc_rf)
Accuracy_fr 0.7910447761194029
Calculate precision and recall¶
In [53]:
precision_rf = precision_score(y_test, y_pred_rf)
recall_rf = recall_score(y_test, y_pred_rf)
print("Precision_rf:", precision_rf)
print("Recall_rf:", recall_rf)
Precision_rf: 0.6282051282051282 Recall_rf: 0.5240641711229946
Fine Tune the random forest model¶
In [54]:
rf_classifier1 = RandomForestClassifier()
param_grid_rf = {'n_estimators': [100, 200, 300],
'max_depth': [None, 10, 20],
'min_samples_split': [2, 5, 10]}
grid_search_rf = GridSearchCV(rf_classifier1, param_grid_rf, cv=5, scoring='accuracy')
grid_search_rf.fit(X_train, y_train)
best_params_rf = grid_search_rf.best_params_
best_rf_classifier = RandomForestClassifier(**best_params_rf)
best_rf_classifier.fit(X_train, y_train)
Out[54]:
RandomForestClassifier(max_depth=10, min_samples_split=10, n_estimators=300)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier(max_depth=10, min_samples_split=10, n_estimators=300)
prediction on test data¶
In [55]:
y_pred_rf_best = best_rf_classifier.predict(X_test)
Calculate score¶
In [56]:
acc_rf_best = best_rf_classifier.score(X_test, y_test)
print("Accuracy_rf_best", acc_rf_best)
Accuracy_rf_best 0.8031272210376688
Calculate precision and recall¶
In [57]:
precision_rf_best = precision_score(y_test, y_pred_rf_best)
recall_rf_best = recall_score(y_test, y_pred_rf_best)
print("Precision_rf_best:", precision_rf_best)
print("Recall_rf_best:", recall_rf_best)
Precision_rf_best: 0.6611295681063123 Recall_rf_best: 0.5320855614973262
Balancing the data with smote¶
In [58]:
smote = SMOTE(sampling_strategy='minority')
X_msote, y_smote = smote.fit_resample(X, y)
In [59]:
y_smote.value_counts()
Out[59]:
Churn 0 5163 1 5163 Name: count, dtype: int64
Train test split on balanced data¶
In [60]:
X_train_smote, X_test_smote, y_train_smote, y_test_smote = train_test_split(X_msote, y_smote, test_size=0.2, random_state=43, stratify=y_smote)
Model training on balanced data¶
In [61]:
lr_model_smote = LogisticRegression()
lr_model_smote.fit(X_train_smote, y_train_smote)
/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning:
lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
Out[61]:
LogisticRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression()
Model evaluation on smote data¶
In [62]:
y_pred_lr_smote = lr_model_smote.predict(X_test)
In [63]:
acc_lr_smote = lr_model_smote.score(X_test, y_test)
print("Accuracy_lr_smote", acc_lr_smote)
Accuracy_lr_smote 0.7626154939587776
Calculate precision and recall¶
In [64]:
precision_lr_smote = precision_score(y_test, y_pred_lr_smote)
recall_lr_smote = recall_score(y_test, y_pred_lr_smote)
print("Precision_lr_smote:", precision_lr_smote)
print("Recall_lr_smote:", recall_lr_smote)
Precision_lr_smote: 0.5367647058823529 Recall_lr_smote: 0.7807486631016043
Fine tune logistic regression model on balanced data¶
In [65]:
log_reg_smote = LogisticRegression()
param_grid_lr_smote = {'C': [0.001, 0.01, 0.1, 1, 10, 100],
'penalty': ['l1', 'l2'],
'solver': ['liblinear', 'saga'],
'class_weight': ['balanced', None]}
grid_search_lr_smote = GridSearchCV(log_reg_smote, param_grid_lr_smote, cv=5, scoring='accuracy')
grid_search_lr_smote.fit(X_train_smote, y_train_smote)
best_params_lr_smote = grid_search_lr_smote.best_params_
best_log_reg_smote = LogisticRegression(**best_params_lr_smote)
best_log_reg_smote.fit(X_train_smote, y_train_smote)
/Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:350: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge /Users/naina/Desktop/first_project/env/lib/python3.11/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
Out[65]:
LogisticRegression(C=100, class_weight='balanced', penalty='l1',
solver='liblinear')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression(C=100, class_weight='balanced', penalty='l1',
solver='liblinear')Fine tuned logistic regression model evaluation¶
In [66]:
y_pred_lr_best_smote = best_log_reg_smote.predict(X_test)
Calculate score¶
In [67]:
acc_lr_best_smote = best_log_reg_smote.score(X_test, y_test)
print("Accuracy_lr_best_smote", acc_lr_best_smote)
Accuracy_lr_best_smote 0.7825159914712153
Calculate precision and recall¶
In [68]:
precision_lr_best_smote = precision_score(y_test, y_pred_lr_best_smote)
recall_lr_best_smote = recall_score(y_test, y_pred_lr_best_smote)
print("Precision_lr_best_smote:", precision_lr_best_smote)
print("Recall_lr_best_smote:", recall_lr_best_smote)
Precision_lr_best_smote: 0.567193675889328 Recall_lr_best_smote: 0.767379679144385
Train Random forest model on balanced data¶
In [69]:
rf_classifier_smote = RandomForestClassifier()
rf_classifier_smote.fit(X_train_smote, y_train_smote)
Out[69]:
RandomForestClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier()
prediction on test data¶
In [70]:
y_pred_rf_smote = rf_classifier_smote.predict(X_test)
Calculate score¶
In [71]:
acc_rf_smote= rf_classifier_smote.score(X_test, y_test)
print("Accuracy_rf_smote", acc_rf_smote)
Accuracy_rf_smote 0.837953091684435
Calculate precision and recall¶
In [72]:
precision_rf_smote = precision_score(y_test, y_pred_rf_smote)
recall_rf_smote = recall_score(y_test, y_pred_rf_smote)
print("Precision_rf_smote:", precision_rf_smote)
print("Recall_rf_smote:", recall_rf_smote)
Precision_rf_smote: 0.631768953068592 Recall_rf_smote: 0.9358288770053476
Fine Tune the random forest model on balanced data¶
In [73]:
rf_classifier2 = RandomForestClassifier()
param_grid_rf_smote = {'n_estimators': [100, 200, 300],
'max_depth': [None, 10, 20],
'min_samples_split': [2, 5, 10]}
grid_search_rf_smote = GridSearchCV(rf_classifier2, param_grid_rf_smote, cv=5, scoring='accuracy')
grid_search_rf_smote.fit(X_train_smote, y_train_smote)
best_params_rf_smote =grid_search_rf_smote.best_params_
best_rf_classifier_smote = RandomForestClassifier(**best_params_rf_smote)
best_rf_classifier_smote.fit(X_train_smote, y_train_smote)
Out[73]:
RandomForestClassifier(max_depth=20, n_estimators=200)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier(max_depth=20, n_estimators=200)
prediction on test data¶
In [74]:
y_pred_rf_best_smote= best_rf_classifier_smote.predict(X_test)
Calculate score¶
In [75]:
acc_rf_best_smote = best_rf_classifier_smote.score(X_test_smote, y_test_smote)
print("Accuracy_rf_best_smote", acc_rf_best_smote)
Accuracy_rf_best_smote 0.8451113262342691
Calculate precision and recall¶
In [76]:
precision_rf_best_smote = precision_score(y_test, y_pred_rf_best_smote)
recall_rf_best_smote = recall_score(y_test, y_pred_rf_best_smote)
print("Precision_rf_best_smote:", precision_rf_best_smote)
print("Recall_rf_best_smote:", recall_rf_best_smote)
Precision_rf_best_smote: 0.6345454545454545 Recall_rf_best_smote: 0.9331550802139037
In [ ]: